---
title: "LDA"
author: "Kohei Watanabe"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output: html_document
---

```{r, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
require(topicmodels)
require(knitr)
require(ldatuning)
source("functions.R")
tune <- readRDS("lda_tuning.RDS")
```

```{r}
FindTopicsNumber_plot(tune)
```

```{r, asis=TRUE}
lda_k20 <- readRDS("lda_k20.RDS")
kable(compact_terms(terms(lda_k20, 20)), col.names = c("terms"))
kable(compact_terms(frex(lda_k20)), col.names = c("terms"))
```

```{r}
dat_human <- read.csv("data/HumanCoding.csv", stringsAsFactors = FALSE)
dat_topic <- read.csv("class-topic.csv", stringsAsFactors = FALSE)
dat_human$lda_class <- topics(lda_k20)[dat_human$doc_id]
dat_human$lda_topic <- dat_topic$topic[dat_human$lda_clas]
table(dat_human$lda_topic)
dat_human <- subset(dat_human, !is.na(dat_human$lda_topic))
accu <- newsmap::accuracy(dat_human$lda_topic, dat_human$coding)
print(accu)
mean(accu$f1)
```

```{r, asis=TRUE}
slda <- readRDS("slda_knowledge.RDS")
kable(compact_terms(terms(slda, 20)), col.names = c("terms"))
```

